원작자 : Maksym Shkliarevskyi
https://www.researchgate.net/profile/Maksym-Shkliarevskyi-2?fbclid=IwAR3bHR2k--e5yZOekorZFYQikVkzGf8Z4hcSY1z4TC0QUKtd5yePUxAQo34
Kaggle에서 이용자들을 대상으로 매년 Survey를 수행하여 공개하는데 21년도에는 21.9.1 부터 21.10.4일까지 설문을 하여 총 25,973개의 답변을 공개
단답형 및 복수선택 답변들은 각각하나의 컬럼으로 구성되어 있음
library(tidyverse)
library(patchwork) #ggplot 그래프 이어붙이기 옆으로 이어붙일때는 + 아래로 이어붙일때는 슬래쉬 (/)
library(scales)
library(ggpubr) #geom braket
library(viridis) #color map
# Custom theme
theme_set(theme_minimal())
my_theme <- theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 18),
plot.subtitle = element_text(hjust = 0.5, size = 13),
axis.title = element_text(face = 'bold', size = 15),
axis.text = element_text(size = 13))
# Custom palette
my_palette <- c('#FFCB3E', '#FB836F', '#C1549C', '#7E549F', '#33546D')
show_col(my_palette, ncol = 1, labels = T) ks_2021 <- read_csv('/Users/spark/OneDrive/ps/Kaggle/Survey/2021/kaggle_survey_2021_responses.csv')
ks_2021_descriptions <- ks_2021[1,] # 설문행렬 별도 구분
ks_2021 <- ks_2021[2:nrow(ks_2021),] # 설문행렬 외에 나머지 부분 ks_2020 <- read_csv('/Users/spark/OneDrive/ps/Kaggle/Survey/2020/kaggle_survey_2020_responses.csv')
ks_2020_descriptions <- ks_2020[1,]
ks_2020 <- ks_2020[2:nrow(ks_2020),]
ks_2019 <- read_csv('/Users/spark/OneDrive/ps/Kaggle/Survey/2019/multiple_choice_responses.csv')
ks_2019_descriptions <- ks_2019[1,]
ks_2019 <- ks_2019[2:nrow(ks_2019),]
ks_2018 <- read_csv('/Users/spark/OneDrive/ps/Kaggle/Survey/2018/multipleChoiceResponses.csv')
ks_2018_descriptions <- ks_2018[1,]
ks_2018 <- ks_2018[2:nrow(ks_2018),]
ks_2017 <- read_csv('/Users/spark/OneDrive/ps/Kaggle/Survey/2017/multipleChoiceResponses.csv')
ks_2017_descriptions <- ks_2017[1,]
ks_2017 <- ks_2017[2:nrow(ks_2017),]나이, 성별, 교육수준, 재직여부, 경력 등을 연도별로 시각화하여 분석해보고자 함
Q1 What is your age (# years)?
# 2021
kor_age21 <- ks_2021 %>%
filter(Q3 == 'South Korea') %>%
count(Q1) %>%
rename(Age = Q1, Count = n) %>%
ggplot(aes(Age, Count)) +
geom_col(fill = my_palette[5], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
geom_label(aes(x = 9, y = 100, label = paste0('Responses: ', sum(Count))),
col = my_palette[5], size = 6) +
geom_bracket(xmin = 2.5, xmax = 4.5, y.position = 115, label = "44.3%",
tip.length = 0.02, size = 0.9, label.size = 6, color = my_palette[5])+
labs(x = '', title = 'Age distribution (South Korea)',
subtitle = '(2021)') +
my_theme
# 2020
kor_age20 <- ks_2020 %>%
filter(Q3 == 'South Korea') %>%
count(Q1) %>%
rename(Age = Q1, Count = n) %>%
ggplot(aes(Age, Count)) +
geom_col(fill = my_palette[4], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
geom_label(aes(x = 9, y = 40, label = paste0('Responses: ', sum(Count))),
col = my_palette[4], size = 6) +
labs(x = '', subtitle = '(2020)') +
my_theme
# 2019
kor_age19 <- ks_2019 %>%
filter(Q3 == 'South Korea') %>%
count(Q1) %>%
rename(Age = Q1, Count = n) %>%
ggplot(aes(Age, Count)) +
geom_col(fill = my_palette[3], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
geom_label(aes(x = 8.5, y = 45, label = paste0('Responses: ', sum(Count))),
col = my_palette[3], size = 6) +
labs(x = '', subtitle = '(2019)') +
my_theme
# 2018
kor_age18 <- ks_2018 %>%
filter(Q3 == 'South Korea') %>%
count(Q2) %>%
rename(Age = Q2, Count = n) %>%
ggplot(aes(Age, Count)) +
geom_col(fill = my_palette[2], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
geom_label(aes(x = 8.5, y = 60, label = paste0('Responses: ', sum(Count))),
col = my_palette[2], size = 6) +
labs(x = '', subtitle = '(2018)',
caption = '') +
my_theme
# 2017
ks_2017$Age_range <- cut(ks_2017$Age, c(0, 18, 22, 25, 30, 35, 40, 45, 50, 55, 60, 70, 80, 100))
levels(ks_2017$Age_range) <- c("<18", "18-21","22-24","25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-69", "70-79", "80+")
kor_age17 <- ks_2017 %>%
filter(Country == 'South Korea') %>%
count(Age_range) %>%
rename(Age = Age_range, Count = n) %>%
slice(-n()) %>%
ggplot(aes(Age, Count)) +
geom_col(fill = my_palette[1], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
geom_label(aes(x = 10, y = 50, label = paste0('Responses: ', sum(Count))),
col = my_palette[1], size = 6) +
labs(x = '', subtitle = '(2017)',
caption = '\u00A9 Seung Park') +
my_theme
design <- 'AAAAAAAA
BBBBCCCC
DDDDEEEE'
kor_age21 + kor_age20 + kor_age19 + kor_age18 + kor_age17 + plot_layout(design = design)2021년에는 359명이 응답하였고 20대중후반이 30% 30대초중반이 15%의 비율을 갖고 있음
Q2 What is your gender? - Selected Choice
kor_gender21 <- ks_2021 %>%
filter(Q3 == 'South Korea') %>%
count(Q2) %>%
rename(Gender = Q2, Count = n) %>%
ggplot(aes(Count, reorder(Gender, Count))) +
geom_col(fill = my_palette[5], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
labs(y = '', title = 'Gender distribution (South Korea)',
subtitle = '(2021)') +
my_theme
# 2020
kor_gender20 <- ks_2020 %>%
filter(Q3 == 'South Korea') %>%
count(Q2) %>%
rename(Gender = Q2, Count = n) %>%
ggplot(aes(Count, reorder(Gender, Count))) +
geom_col(fill = my_palette[4], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
labs(y = '', subtitle = '(2020)') +
my_theme
# 2019
kor_gender19 <- ks_2019 %>%
filter(Q3 == 'South Korea') %>%
count(Q2) %>%
rename(Gender = Q2, Count = n) %>%
ggplot(aes(Count, reorder(Gender, Count))) +
geom_col(fill = my_palette[3], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
labs(y = '', subtitle = '(2019)') +
my_theme
# 2018
kor_gender18 <- ks_2018 %>%
filter(Q3 == 'South Korea') %>%
count(Q1) %>%
rename(Gender = Q1, Count = n) %>%
ggplot(aes(Count, reorder(Gender, Count))) +
geom_col(fill = my_palette[2], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
labs(y = '', subtitle = '(2018)',
caption = '') +
my_theme
# 2017
kor_gender17 <- ks_2017 %>%
filter(Country == 'South Korea') %>%
count(GenderSelect) %>%
rename(Gender = GenderSelect, Count = n) %>%
mutate(Gender = str_replace(Gender, 'Non-binary, genderqueer, or gender non-conforming', 'Non-binary')) %>%
mutate(Gender = str_replace_na(Gender, 'NA')) %>%
filter(Gender != 'NA') %>%
ggplot(aes(Count, reorder(Gender, Count))) +
geom_col(fill = my_palette[1], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
labs(y = '', subtitle = '(2017)',
caption = '\u00A9 Seung Park') +
my_theme
design <- 'AAAAAAAA
BBBBCCCC
DDDDEEEE'
kor_gender21 + kor_gender20 + kor_gender19 + kor_gender18 + kor_gender17 + plot_layout(design = design)남녀의 비율은 전체적으로 8:2
genders <- c('Nonbinary', 'Woman', 'Man')
kor_gend_age21 <- ks_2021 %>%
filter(Q3 == 'South Korea') %>%
select(Q2, Q1) %>%
group_by(Q2, Q1) %>%
count() %>%
ungroup() %>%
rename(Gender = Q2, Age = Q1, Count = n) %>%
filter(Gender %in% genders) %>%
mutate(rate = Count / sum(Count)) %>%
complete(Gender,
Age = c("18-21","22-24","25-29", "30-34", "35-39", "40-44",
"45-49", "50-54", "55-59", "60-69"),
fill = list(rate = 0))
kor_gend_age21$Gender <- factor(kor_gend_age21$Gender, levels = genders)
kor_gend_age21 %>%
ggplot(aes(Age, Gender)) +
geom_tile(aes(fill = rate), show.legend = FALSE, color = "gray30") +
geom_text(aes(label = label_percent(accuracy = 0.01)(rate)), size = 5) +
scale_fill_viridis(option = 'viridis', begin = 0.4, end = 1) +
labs(title = 'Gender by Age (South Korea)',
subtitle = '(2021)',
caption = '\u00A9 Seung Park') +
my_theme +
theme(panel.grid = element_blank())Q4 What is the highest level of formal education that you have attained or plan to attain within the next 2 years ?(2017제외)
# 2021
kor_ed21 <- ks_2021 %>%
filter(Q3 == 'South Korea') %>%
count(Q4) %>%
mutate(Q4 = str_replace(Q4, 'No formal education past high school', 'High school')) %>%
mutate(Q4 = str_replace(Q4, 'Some college/university study without earning a bachelor’s degree', 'Some college/university')) %>%
rename(Education = Q4, Count = n) %>%
filter(Education != 'I prefer not to answer') %>%
ggplot(aes(Count, reorder(Education, Count))) +
geom_col(fill = my_palette[5], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
labs(y = '', subtitle = '(2021)') +
my_theme
# 2020
kor_ed20 <- ks_2020 %>%
filter(Q3 == 'South Korea') %>%
count(Q4) %>%
mutate(Q4 = str_replace(Q4, 'No formal education past high school', 'High school')) %>%
mutate(Q4 = str_replace(Q4, 'Some college/university study without earning a bachelor’s degree', 'Some college/university')) %>%
rename(Education = Q4, Count = n) %>%
filter(Education != 'I prefer not to answer') %>%
ggplot(aes(Count, reorder(Education, Count))) +
geom_col(fill = my_palette[4], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
labs(y = '', subtitle = '(2020)') +
my_theme
# 2019
kor_ed19 <- ks_2019 %>%
filter(Q3 == 'South Korea') %>%
count(Q4) %>%
mutate(Q4 = str_replace(Q4, 'No formal education past high school', 'High school')) %>%
mutate(Q4 = str_replace(Q4, 'Some college/university study without earning a bachelor’s degree', 'Some college/university')) %>%
rename(Education = Q4, Count = n) %>%
filter(Education != 'I prefer not to answer') %>%
ggplot(aes(Count, reorder(Education, Count))) +
geom_col(fill = my_palette[3], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
labs(y = '', subtitle = '(2019)',
caption = '') +
my_theme
# 2018
kor_ed18 <- ks_2018 %>%
filter(Q3 == 'South Korea') %>%
count(Q4) %>%
mutate(Q4 = str_replace(Q4, 'No formal education past high school', 'High school')) %>%
mutate(Q4 = str_replace(Q4, 'Some college/university study without earning a bachelor’s degree', 'Some college/university')) %>%
rename(Education = Q4, Count = n) %>%
filter(Education != 'I prefer not to answer') %>%
ggplot(aes(Count, reorder(Education, Count))) +
geom_col(fill = my_palette[2], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(Count / sum(Count)))) +
labs(y = '', subtitle = '(2018)',
caption = '\u00A9 Seung Park') +
my_theme
design <- 'AAAABBBB
CCCCDDDD'
kor_ed21 + kor_ed20 + kor_ed19 + kor_ed18 + plot_layout(design = design) +
plot_annotation(title = "Education level distribution (South Korea)", theme = my_theme)2021년에는 학사가 36.6%으로 석사 26.5%보다 높았음 2020년에는 학사가 26.7% 석사가 32.6%였음
Q5 Select the title most similar to your current role (or most recent title if retired)
kor_role21 <- ks_2021 %>%
filter(Q3 == 'South Korea') %>%
count(Q5) %>%
arrange(-n) %>%
rename(Role = Q5, Count = n) %>%
mutate(Rate = Count/sum(Count))
kor_role20 <- ks_2020 %>%
filter(Q3 == 'South Korea') %>%
count(Q5) %>%
arrange(-n) %>%
rename(Role = Q5, Count = n) %>%
mutate(Rate = Count/sum(Count))
kor_role19 <- ks_2019 %>%
filter(Q3 == 'South Korea') %>%
count(Q5) %>%
arrange(-n) %>%
rename(Role = Q5, Count = n) %>%
mutate(Rate = Count/sum(Count))
kor_role18 <- ks_2018 %>%
filter(Q3 == 'South Korea') %>%
count(Q6) %>%
arrange(-n) %>%
rename(Role = Q6, Count = n) %>%
mutate(Rate = Count/sum(Count))
kor_role17 <- ks_2017 %>%
filter(Country == 'South Korea') %>%
count(CurrentJobTitleSelect) %>%
arrange(-n) %>%
rename(Role = CurrentJobTitleSelect, Count = n) %>%
mutate(Rate = Count/sum(Count))
kor_roles <- kor_role21[,c(1, 3)] %>%
left_join(kor_role20[,c(1, 3)], by = 'Role') %>%
left_join(kor_role19[,c(1, 3)], by = 'Role') %>%
left_join(kor_role18[,c(1, 3)], by = 'Role') %>%
left_join(kor_role17[,c(1, 3)], by = 'Role')
colnames(kor_roles) <- c('Role', '2021', '2020', '2019', '2018', '2017')
kor_r21 <- kor_role21 %>%
ggplot(aes(Count, reorder(Role, Count))) +
geom_col(fill = my_palette[5], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.01)(Rate))) +
labs(y = 'Position', title = 'Current position (South Korea)',
subtitle = '(2021)') +
my_theme
kor_r_20_17 <- kor_roles %>%
mutate(`2020` = `2020`-`2021`,
`2019` = `2019`-`2021`,
`2018` = `2018`-`2021`,
`2017` = `2017`-`2021`) %>%
select(1, `2020`:`2017`) %>%
gather(`2020`:`2017`, key = Year, value = Value) %>%
mutate(hjust = if_else(Value > 0, -0.5, 1.5)) %>%
ggplot(aes(y = Role, x = Value, color = Value > 0)) +
geom_vline(xintercept = 0, color = 'gray50')+ #수직축 설정
geom_point(stat = "identity", show.legend = FALSE, size = 7) +
geom_segment(aes(x = 0, xend = Value,
y = Role, yend = Role), size = 1.2, show.legend = FALSE) +
geom_label(aes(label = label_percent(accuracy = 0.01)(Value), hjust = hjust),
show.legend = FALSE, size = 4) +
scale_x_continuous(limits = c(-0.1, 0.1),
breaks = c(-0.08, -0.04, 0, 0.04, 0.08),
labels = label_percent(accuracy = 1)(c(-0.08, -0.04, 0, 0.04, 0.08))) +
scale_color_manual(values = c('#E61C5D', '#0A516D')) +
facet_wrap(~Year, nrow = 2, as.table = FALSE) +
labs(title = 'Current positions by past years (South Korea)',
subtitle = '(differences in comparison with 2021)',
x = 'Difference, %',
y = 'Position',
caption = '\u00A9 Seung Park') +
theme(panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid = element_line(linetype = 'dashed', size = 0.4),
strip.text = element_text(size = 13, face = 'bold'),
strip.background = element_rect(color = 'black', fill = 'gray95'),
strip.switch.pad.wrap = unit(500, 'mm')) +
my_theme
design <- 'AAAAAA
BBBBBB
BBBBBB'
kor_r21 + kor_r_20_17 + plot_layout(design = design)2021년 응답자중 27.3%가 학사 또는 석사 학생이였으며 2020년도는 25%가 학생이였음
levels <- c("High school", "Some college/university", "Bachelor’s degree",
"Master’s degree", "Professional doctorate", "Doctoral degree")
kor_exp_ed21 <- ks_2021 %>%
filter(Q3 == 'South Korea') %>%
select(Q5, Q4) %>%
group_by(Q5, Q4) %>%
count() %>%
ungroup() %>%
mutate(Q4 = str_replace(Q4, 'No formal education past high school', 'High school')) %>%
mutate(Q4 = str_replace(Q4, 'Some college/university study without earning a bachelor’s degree', 'Some college/university')) %>%
rename(Position = Q5, Education = Q4, Count = n) %>%
filter(Education != 'I prefer not to answer') %>%
mutate(rate = Count / sum(Count)) %>%
complete(Position, Education = levels, fill = list(rate = 0))
kor_exp_ed21$Education <- factor(kor_exp_ed21$Education, levels = levels)
kor_exp_ed21 %>%
ggplot(aes(Education, reorder(Position, Count))) +
geom_tile(aes(fill = rate), show.legend = FALSE, color = "gray30") +
geom_text(aes(label = label_percent(accuracy = 0.01)(rate)), size = 5) +
scale_fill_viridis(option = 'viridis', begin = 0.4, end = 1) +
labs(y = 'Position',
title = 'Position by Education level (South Korea)',
subtitle = '(2021)',
caption = '\u00A9 Seung Park') +
my_theme +
theme(panel.grid = element_blank())Q6 For how many years have you been writing code and/or programming?
exp_lev <- c('I have never written code', '< 1', '1-3', '3-5', '5-10', '10-20', '20+')
kor_exp21 <- ks_2021 %>%
filter(Q3 == 'South Korea') %>%
select(Q5:Q6) %>%
mutate(Q6 = str_replace(Q6, ' years', '')) %>%
group_by(Q5, Q6) %>%
count() %>%
ungroup() %>%
mutate(rate = n / sum(n)) %>%
rename(Position = Q5, Experience = Q6, Count = n) %>%
complete(Position, Experience = exp_lev, fill = list(rate = 0))
kor_exp21$Experience <- factor(kor_exp21$Experience, levels = exp_lev)
kor_exp21 %>%
ggplot(aes(Experience, reorder(Position, Count))) +
geom_tile(aes(fill = rate), show.legend = FALSE, color = "gray30") +
geom_text(aes(label = label_percent(accuracy = 0.01)(rate)), size = 5) +
scale_fill_viridis(option = 'viridis', begin = 0.4, end = 1) +
labs(y = 'Position',
title = 'Position by Experience groups',
subtitle = '(2021)',
caption = '\u00A9 Seung Park') +
my_theme +
theme(panel.grid = element_blank())Q7 What programming languages do you use on a regular basis?(중복답변허용)
kor_lang2021 <- ks_2021 %>%
filter(Q3 == 'South Korea') %>%
select(Q1:Q7_OTHER) %>%
mutate(id = 1:n()) %>%
gather(Q7_Part_1:Q7_OTHER, key = 'key', value = 'Language') %>%
filter(!is.na(Language))
kor_lang2020 <- ks_2020 %>%
filter(Q3 == 'South Korea') %>%
select(Q1:Q7_OTHER) %>%
mutate(id = 1:n()) %>%
gather(Q7_Part_1:Q7_OTHER, key = 'key', value = 'Language') %>%
filter(!is.na(Language))
kor_lang2019 <- ks_2019 %>%
filter(Q3 == 'South Korea') %>%
select(c(Q1, Q2, Q3, Q4, Q5), Q18_Part_1:Q18_Part_12) %>%
mutate(id = 1:n()) %>%
gather(Q18_Part_1:Q18_Part_12, key = 'key', value = 'Language') %>%
filter(!is.na(Language))
kor_lang2018 <- ks_2018 %>%
filter(Q3 == 'South Korea') %>%
select(c(Q1, Q2, Q3, Q4, Q6), Q16_Part_1:Q16_Part_18) %>%
mutate(id = 1:n()) %>%
gather(Q16_Part_1:Q16_Part_18, key = 'key', value = 'Language') %>%
filter(!is.na(Language))
kor_lang_n21 <- kor_lang2021 %>%
count(Language) %>%
arrange(-n) %>%
mutate(rate = n / nrow(ks_2021 %>% filter(Q3 == 'South Korea'))) %>%
rename(Count = n) %>%
ggplot(aes(reorder(Language, -Count), Count)) +
geom_col(fill = my_palette[5], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(rate))) +
labs(x = '', title = 'Programming languages that used on a regular basis (South Korea)',
subtitle = '(2021)') +
my_theme
kor_lang_n20 <- kor_lang2020 %>%
count(Language) %>%
arrange(-n) %>%
mutate(rate = n / nrow(ks_2020 %>% filter(Q3 == 'South Korea'))) %>%
rename(Count = n) %>%
ggplot(aes(reorder(Language, -Count), Count)) +
geom_col(fill = my_palette[4], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(rate))) +
labs(x = '', subtitle = '(2020)') +
my_theme
kor_lang_n19 <- kor_lang2019 %>%
count(Language) %>%
arrange(-n) %>%
mutate(rate = n / nrow(ks_2019 %>% filter(Q3 == 'South Korea'))) %>%
rename(Count = n) %>%
ggplot(aes(reorder(Language, -Count), Count)) +
geom_col(fill = my_palette[3], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(rate))) +
labs(x = '', subtitle = '(2019)') +
my_theme
kor_lang_n18 <- kor_lang2018 %>%
count(Language) %>%
arrange(-n) %>%
mutate(rate = n / nrow(ks_2018 %>% filter(Q3 == 'South Korea'))) %>%
rename(Count = n) %>%
mutate(Language = str_replace(Language, 'Javascript/Typescript', 'Javascript/\nTypescript')) %>%
mutate(Language = str_replace(Language, 'Visual Basic/VBA', 'Visual Basic/\nVBA')) %>%
ggplot(aes(reorder(Language, -Count), Count)) +
geom_col(fill = my_palette[2], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(rate))) +
labs(x = '', subtitle = '(2018)',
caption = '\u00A9 Seung Park') +
my_theme
design <- 'AAAA
BBBB
CCCC
DDDD'
kor_lang_n21 + kor_lang_n20 + kor_lang_n19 + kor_lang_n18 + plot_layout(design = design)2021년에 응답자의 84.4%가 파이썬을 사용한다고 답변하였고 이는 2020년 대비 파이썬의 사용자가 10%이상 증가하였음
반면 R 사용자는 24.8% 로 전기 23.7%대비 소폭 증가하였음
Q8 What programming language would you recommend an aspiring data scientist to learn first? - Selected Choice
ks_2021 %>%
filter(Q3 == 'South Korea') %>%
count(Q8) %>%
filter(!is.na(Q8)) %>%
arrange(-n) %>%
rename(Language = Q8, Count = n) %>%
mutate(rate = Count/sum(Count)) %>%
ggplot(aes(reorder(Language, -Count), Count)) +
geom_col(fill = my_palette[5], color = 'black') +
geom_label(aes(label = label_percent(accuracy = 0.1)(rate))) +
labs(x = '', title = 'Programming languages that you recommend to learn first (South Korea)',
subtitle = '(2021)') +
my_theme 응답자의 81.5%가 파이썬을 추천하였음